
#delimit;
clear;
clear matrix;
set memory 1000m; 
set more off;
cap log close;  

#delimit;
global MY_OUT_PATH  "[path where your master data set is stored]"; 
global MY_TEMP_PATH "[path where your temporary files are stored]";
global MY_FIGURE_PATH "[path where your figures and corresponding data sets are stored]";

#delimit;
use "${MY_OUT_PATH}NEW_CNEF3_LSsample.dta", clear;

*********************************************;
* check labor force status if working==0;
* lfs is current. Change it to previous year;

#delimit;
preserve;
keep persnr year lfp;
replace year=year+1;
rename lfp lfp_new;
save "${MY_TEMP_PATH}changed_lfp", replace;
restore;
merge 1:1 persnr year using "${MY_TEMP_PATH}changed_lfp";
drop _merge;
drop lfp;

************************************
* drop those not in labor force;
#delimit;
drop if working==0 & (lfp==. | lfp~=1);

* adjust logwage;
#delimit;
drop logwage;
gen logwage=log(wage) if working==1;

#delimit;
gen logwage_incl_unemp=logwage;
replace logwage_incl_unemp=0 if working==0;
* For year 1983 we do not have information on labor force participation, therefore we set wage with unemployed included to missing;
replace logwage_incl_unemp=. if year==1983;

* drop everyone outside age range;
#delimit;
keep if (age>=25 & age<=60);


**********************************************************************
* only keep working if credible wage;
**********************************************************************

#delimit;
drop if working==1 & wage<3;


************************************************;
* delete p99.50;;
************************************************;

* create percentiles;
#delimit;
sum logwage, detail;
gen p99logwage=r(p99);
gen p1logwage=r(p1);
sum logwage if logwage<=p1logwage, detail;
gen p025logwage=r(p25);
gen p050logwage=r(p50);
#delimit;
sum logwage if logwage>=p99logwage, detail;
gen p9950logwage=r(p50);
gen p9975logwage=r(p75);

drop if logwage>p9950logwage & logwage~=.;

#delimit;
sum logwage_incl_unemp, detail;
gen p99logwage_incl_unemp=r(p99);
gen p1logwage_incl_unemp=r(p1);
sum logwage_incl_unemp if logwage_incl_unemp<=p1logwage_incl_unemp, detail;
gen p025logwage_incl_unemp=r(p25);
gen p050logwage_incl_unemp=r(p50);
#delimit;
sum logwage_incl_unemp if logwage_incl_unemp>=p99logwage_incl_unemp, detail;
gen p9950logwage_incl_unemp=r(p50);
gen p9975logwage_incl_unemp=r(p75);

drop if logwage_incl_unemp>p9950logwage_incl_unemp & logwage~=.;


*****************************;
* wages: percentile ratios;
*****************************;

#delimit;
preserve;
collapse (p10) p10wage=wage (p50) p50wage=wage (p90) p90wage=wage if working==1 [pweight=w11105], by(year);
gen p9050wage=p90wage/p50wage;
label var p9050wage "ohne AL";
gen p1050wage=p10wage/p50wage;
label var p1050wage "ohne AL";
sort year;
list;
save "${MY_FIGURE_PATH}wagepercentileratios_p9950.dta", replace;
restore;

#delimit;
preserve;
collapse (p10) p10wage_incl=wage (p50) p50wage_incl=wage (p90) p90wage_incl=wage [pweight=w11105], by(year);
gen p9050wage_incl=p90wage_incl/p50wage_incl;
label var p9050wage_incl "inkl. AL";
gen p1050wage_incl=p10wage_incl/p50wage_incl;
label var p1050wage_incl "inkl. AL";
sort year;
merge 1:1 year using "${MY_FIGURE_PATH}wagepercentileratios_p9950.dta";
save "${MY_FIGURE_PATH}wagepercentileratios_p9950.dta", replace;

